package com.galeno.test

import com.galeno.utils.SparkUtil
import org.apache.spark.HashPartitioner
import org.apache.spark.rdd.{RDD, ShuffledRDD}

import scala.collection.mutable.ArrayBuffer

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/8/3121:07
 */
object GroupByKeyDemo {
  def main(args: Array[String]): Unit = {
    val sc = SparkUtil.getSc
    val wors = sc.parallelize(
      List(
        "spark", "hadoop", "hive", "spark",
        "spark", "flink", "spark", "hbase",
        "kafka", "kafka", "kafka", "kafka",
        "hadoop", "flink", "hive", "flink"
      ), 4
    )
    //wors.saveAsTextFile("data/outgroup")
    val wordAnd1: RDD[(String, Int)] = wors.map((_, 1))
    //    val grouped: RDD[(String, Iterable[Int])] = wordAnd1.groupByKey(6)
    //    grouped.saveAsTextFile("data/groupout")
    //    Thread.sleep(100000)
    //实现groupbykey相同的功能
    val shufferRdd: ShuffledRDD[String, Int, ArrayBuffer[Int]] = new ShuffledRDD[String, Int, ArrayBuffer[Int]](wordAnd1, new HashPartitioner(wordAnd1.partitions.length))



  }
}
